import  os
import cv2
import numpy as np





def load_data(datasetdir):
    images = []
    labels = []
    filenamelist = []



    # 获取所有的文件夹
    folders = os.listdir(datasetdir)

    # 获取所有的文件名
    for folder in folders:
        filenames = os.listdir(datasetdir + '\%s' % folder)
        for filename in filenames:
            filenamelist.append(datasetdir + '\%s\%s' % (folder, filename))


    print('获取文件列表成功。。。')
    # 打乱顺序
    filenamelist = np.array(filenamelist)
    np.random.shuffle(filenamelist)

    #print(filenamelist[2])

    # 读取img文件和解析出标签

    print('要读取%d个图片。。。'%(len(filenamelist)))

    index = 0
    for filename in filenamelist:
        img = cv2.imread(filename, 0)
        images.append(img)
        tmp = filename.split('\\')
        labels.append(tmp[-2])
        index+=1
        if index % 10000 == 0:
            print('第%d个图片'%index)

    images = np.array(images)
    labels = np.array(labels)

    print(images.shape)
    print(labels.shape)
    return images,labels

if __name__ == '__main__':
    datasetdir = r'D:\Users\hehp\Desktop\tmp'
    trainimg,trainlabel = load_data(datasetdir)

    print(trainimg.shape)
    print(trainlabel.shape)

    print(trainlabel[1000])

    cv2.imshow('a',trainimg[1000])
    cv2.waitKey(0)
